{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Lab 9b - 2-fold cross validation\n", "\n", "We will finish Lab 9 in this notebook." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import statsmodels.formula.api as smf\n", "import seaborn as sns\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import datasets, linear_model\n", "from sklearn.model_selection import KFold\n", "\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datasetxy
0I10.08.04
1I8.06.95
2I13.07.58
3I9.08.81
4I11.08.33
\n", "
" ], "text/plain": [ " dataset x y\n", "0 I 10.0 8.04\n", "1 I 8.0 6.95\n", "2 I 13.0 7.58\n", "3 I 9.0 8.81\n", "4 I 11.0 8.33" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load the Anscombe quartet data\n", "anscombe = sns.load_dataset(\"anscombe\")\n", "anscombe.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
2210.07.46
238.06.77
2413.012.74
259.07.11
2611.07.81
2714.08.84
286.06.08
294.05.39
3012.08.15
317.06.42
325.05.73
\n", "
" ], "text/plain": [ " x y\n", "22 10.0 7.46\n", "23 8.0 6.77\n", "24 13.0 12.74\n", "25 9.0 7.11\n", "26 11.0 7.81\n", "27 14.0 8.84\n", "28 6.0 6.08\n", "29 4.0 5.39\n", "30 12.0 8.15\n", "31 7.0 6.42\n", "32 5.0 5.73" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Separate out the Anscombe 3 data\n", "anscombe_3 = anscombe[anscombe[\"dataset\"] == \"III\"]\n", "anscombe_3 = anscombe_3[[\"x\",\"y\"]]\n", "anscombe_3" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XmMnPd9HvDnO9fO7H0OSYn3OSsr8kXJkmVR1K4TOI1hF0GRKmiQxE4h5486gpHWtZvGQYyiNZAUjQADjVXZsQu7dmIlRoQATS3siqJs6yApWecuSfEQuRTJ2fuY672+/eOdWc4s996ZeWfmfT6AQHJ3ufMbinyfmfd93u9PVBVERORfAa8XQERE3mIQEBH5HIOAiMjnGARERD7HICAi8jkGARGRzzEIiIh8jkFARORzDAIiIp8Leb2A9ejt7dW9e/d6vQwiorpy5syZCVXtW+vr6iII9u7di9OnT3u9DCKiuiIi763n63hqiIjI5xgEREQ+xyAgIvI5BgERkc8xCIiIfK4uWkNERF44MZrEt05exNXpNHZ1NeMLx/bjeCLu9bLKju8IiIiWcWI0ia898zaS81l0xsJIzmfxtWfexonRpNdLKzsGARHRMr518iLCQUFzJAQR98dwUPCtkxe9XlrZMQiIiJZxdTqNWDhY8rFYOIix6bRHK6ocBgER0TJ2dTUjY9olH8uYNnZ2NXu0osphEBARLeMLx/bDtBVpw4Kq+6NpK75wbL/XSys7BgER0TKOJ+L4+mc+gHhbFLMZE/G2KL7+mQ80ZGuI9VEiohUcT8Qb8sC/FN8REBH5HIOAiMjnGARERD7HawRERA1mIWchY9hrf2Eeg4CIqEGkDQtTKQOG5SC65Ga41TAIiIjqXNa0MZUykDXX/y6gGIOAiKhOGZaD6bSBVM7a0vdhEBAR1RnLdjCVNrCQ3VoAFFSsNSQi3xGRpIi8VfSxvxCRURF5Q0R+IiKdlXp8IqJGYzuKyYUcrk5nyhYCQGXro98F8KklH3sWwN2qeg+AcwC+WsHHJyJqCKqKmbSBq1NpzGZMqGpZv3/FgkBVTwKYWvKxn6pqIcZeArCzUo9PRNQI5rImrk5lMJUy4JQ5AAq8vEbweQB/u9InReQxAI8BwO7du6u1JiKimpDKuVVQ03Yq/lie3FksIn8CwALwg5W+RlWfVNWjqnq0r6+veosjIvJQ1rRxbSaDm3PZqoQA4ME7AhH5fQCfBjCo5T7RRURUpwzLwVTKQNoo30Xg9apqEIjIpwB8GcDDqtp4+70REW2Q7SimUgbms6Zna6hYEIjIDwEcB9ArImMA/gxuS6gJwLMiAgAvqeofVmoNRES1SlUxl7Ewna7cReD1qlgQqOpvL/Phb1fq8YiI6sVCzsJ0lS4ErwfvLCYiqpKsaWMyZSC3yZlAlcIgICKqMC8vBK8Hg4CIqEIs28F02vT0QvB6MAiIiMrMcRSzGROzGdPzC8HrwSAgIiqjuayJmZQJy6mNC8HrwSAgIiqDjGFjMpWDYdVPABQwCIiItsC03QvBW90cxksMAiKiTXAcxUz+OkC9T8thEBARbVA9XgdYDYOAiGid6vk6wGoYBEREa8iaNqbTBjJGbd0RXC4MAiKiFRiWg+l0fV8IXg8GARHREpbtYCptlHWD+FrGICAiyrMdd5P4uaxV902gjWAQEJHv1dtIiHJjEBCRb6kq5rIWZtIGbMd/AVDAICAi31FVzOcszKbNmtkcxksMAiLylVrbHawWMAiIyBfShoWplNFwN4OVA4OAiBpa1rQxlTKQrbHtIWsJg4CIGlLOsjGdMmt2e8hawiAgooZiO4qplFHz20PWEgYBETWMuayJ6ZS/q6CbwSAgorrHJtDWMAiIqG6xCVQeDAIiqjsZwx0LzSZQeTAIiKhuZAwbM5nG3RfAKwwCIqp5acPCTNrkO4AKYRAQUc1KGxam0yZyDICKYhAQUc3hNYDqYhAQUc0wLAdTKYN3A2/BfNbEyXMTOHFufN2/h0FARJ7j3cBbkzVtvHhhEsOjSbx8aQrWBm+oYxAQkacWchYmF3K8G3iDLNvB6femMTyaxM/enUDWvHUvRTQUwEOH+/DUOr9XxYJARL4D4NMAkqp6d/5j3QD+FsBeAJcB/JaqTldqDURUuyzbwWTKQCrH00Dr5ajizWuzGB5N4vmz45jL3vqzCwUE9+7txkAijo8f7EFXc8T7IADwXQDfBPC/iz72FQBDqvoNEflK/tf/sYJrIKIaY1gOZjMmFnL+2iB+s1QV7yYXMDSaxHOj4xhfyC1+TgB8cFcHBhLbcOxQL9pj4U09RsWCQFVPisjeJR/+LIDj+Z9/D8AJMAiIfCFr2pjLBwCtbWw6jeHRJIZHx3FlKl3yucPbWjGYiOP4kTj62pq2/FjVvkawTVWv539+A8C2Kj8+EVVZ2rAwmzF5N/A6TCzk8NzZcQyPJHH25nzJ53Z1xTDYH8cjR+LY1d1c1sf17GKxqqqIrPi+UEQeA/AYAOzevbtq6yKi8sgYNqbSBm8GW8NcxsTJ8xMYHr2J16/Oovig2NfahEcSfRhIxHEo3goRqcgaqh0EN0Vkh6peF5EdAJIrfaGqPgngSQA4evQoTyQS1Yms6d4MxncAK8vk655DI0mculxa92yPhvDwEffg/yt3diBQoYN/sWoHwTMAfg/AN/I//mOVH5+IKsSwHEyn2QJaiWk7OH3ZrXv+/MKSumc4gAcP9GKwP46P7ulCOBio6toqWR/9IdwLw70iMgbgz+AGwN+JyB8AeA/Ab1Xq8YmoOizbwXTa5M1gy3BU8eZYvu55bvm652B/HA8c6EEsHPRsnZVsDf32Cp8arNRjElH1OI5iJmNiNmOyBlpEVXE+uYChkSSeO5vExIKx+Dm37tmJwUQcD22h7lluvLOYiDZEVTGXsTCT4d7Axa5OFeqeSVydzpR87sj2Ngwk4njkSB96W7de9yw3BgERrVvasDC5wL2BC8bnczhx1u36L6177u5uxkC+8bOzq7x1z3JjEBDRmgzLwWQqxyYQgNmMiRfOj2NoJIk3xkrrnvG2JjySb/wcrGDds9wYBES0IsdRTKcNzGX9PQ4iY9r4xbuTGBq9iVOXp0tOiRXqnoOJOO6uUt2z3BgERHQbXgdw656nLk9haCSJFy9MImuV1j0/cbAXA4k4ju7pQqjKdc9yYxAQ0SJVxXzOwkzKhOX47zqAo4o3iuqe80V1z3BQcF++7nn//h5EPax7lhuDgIgWA2A2bfruQvBadc8P7b5V92yL1kbds9wYBEQ+pqqYy7oB4Ld3AFeK6p5jS+qeie1tGOyP4/jhPvTUYN2z3BgERD41nzUx47N3AOPzOTx3NomhkSTOJxdKPrenuxkDiTgGEnHc2RXzaIXeYBAQ+UwqZ2Eq5Z97AWYzJk6eG8fQaBJvLlP3LBz8D/S11E3ds9wYBEQ+kTVtTKb8MRY6Y9j4xYUJDI0mb6t7dsTCePiwW/f8wJ3tdVn3LDcGAVGDy1k2plMm0kZjTwU1bQevXJrC8GgSv7gwiVxR3TMWDuLBgz3udM/d9V/3LDcGAVGDsmwHU2kDC9nGDQDbUbwxNoOh0SReOD9xe91zXzcGE9tw//7uhqp7lhuDgKjB2I5ipoHvBlZVnLu5gKHRm3ju7Dgmi+qeAQE+tDjdsw+tUR7i1oN/SkQNQlUxm3GbQE4DBsCVSbfuOTSaxLWZ0rpn/w53uqdf6p7lxiAgqnONfDdwci6L5866jZ93l6t79ufrnp3+qnuuRUQ2tMsZg4Coji3kLEw3WBV0Nm3i+fx0zzevzZZ8rlD3HEzEsb+B656vXJzCj05dxfW5DHa0x/Dovbtw3/7uFb8+IIKmcACxcBDRcBBNocCG/mwYBER1KG249wIYVmMEQMaw8bN3JzA8msTp926vex4/3IfB/jjuuqPx656vXJzCE8PnEQoI2qMhTKZyeGL4PB7HocUwCAYETaEgYuEgmsKBDR/4l2IQENWRtGFhJm0i2wD3AhiWO91zpbrnQ4fc6Z4f2d3pq7rnj05dRSggi3sYx8JBZC0bPz4zhl+/Zwei4QCaQuVtQDEIiGqcqmIhZzXEOAjbUbw+NoPhFeqeH9vndv3v39eNJp/WPa/PZdARDSMQEAREIAJEQgEk57PoqNAexwwCohrlOIq5rIm5jFXXF4FVFWdvzmNoJIkTZ8cxmSqte354dxcGEnE8dLDXt3XPcDCAaDiIWCSIfT0tGF/IoSl4KwjThlXR7S79+adOVMMs28FsxsR81qrrGuh7k6n8dM/x2+qedxXqnkfi6G6JeLRC70RCty7sRsNBBAO3zu//4cMH8LVn3kbasBALB5ExbZi24gvH9ldsPQwCohphO+59ALMZs25vBLs5l8Vz+YP/u+Oldc+9Pbeme97ho7qniKApFMgf9AOIhoIIBFa+sHs8EcfXAXzr5EWMTaexs6sZXzi2H8cT8YqtkUFA5DGnKADq8R3ATNrA8+cmMDx6E29emyv53Lb24rpnq0crrC4RWTzgFw7+G230HE/EK3rgX4pBQOSRet4XOG1Y+Pm7kxgaTeL05SkUL78zFl7czP0Dd7Q3bNe/ICBy69X+Jjr8tYBBQFRl9bormGEVTfe8OFlyD0NzpLju2VVyzrvRBPPVzqb8wb/cVU4vMAiIqqQeA8B2FK9fdeueJ89PYCFXWve8f38PBhNxfKyB657hYABN+Vf70VAQkVDj3dPAICCqsHoLAFXF6I15DI26dc+pJXXPj+Trnp841IvWpsY7hBSqnNH8yAY/3MzWeP8XiWpEvQXA5Xzdc2gkieuz2ZLP3bWjHYP9cTx8uK/h6p6RfKMntkyV0y8YBEQVkDYsTC7U/jC4Qt1zaDSJC+Opks/t623BQKIPA4k4dnQ0Rt1TRIo6/GtXOf2CQUBURlnTxnTaQMao3VlAbt3Tne751vuldc/t7dHFg38j1D3LUeX0AwYBURlkTRsz6drdFziVs/DzoumexXXPruYwjh+JYyDRh7t21F7dcyMjmRuhyukFBgHRFtRyABiWg5fzdc8Xl9Q9WyJBfOJQLwYTcXy4huuea41kDgZksc0TjTRGldMLawaBiHwRwPdVdboK6yGqC7V6Csh2FL+8OoOhkSReeHccqdyt9YWDggf292CgP4779/XURQ1y6Ujm5kgIWcvC3782ht/86M66eA71YD3vCLYBOCUirwL4DoD/p1schCIiXwLwbwEogDcBfE5Vs6v/LiLv1WIALNY9R5J47mwS02lz8XP1Xve8MZdBZyyMQCCAgBS2YAzjxmyWIVBGa/6tUNX/LCJ/CuDXAHwOwDdF5O8AfFtVL2z0AUXkTgB/BOAuVc3kv9ejAL670e9FVC21GACXJgrTPW+ve37gjnYMJOqv7hlZHM4WRDQUwL7eViTns2gO3Tp1lTHtio5k9qN1vTxQVRWRGwBuALAAdAF4WkSeVdUvb/JxYyJiAmgG8P4mvgdRxeUs9xpAKlcb1wBuFNU9Ly6pe+7vbcFAIo5HEn11UfcsVDmjoQBikSCaQrd3+L9wbH/VRzL70XquETwO4HcBTAB4CsB/UFVTRAIAzgPYUBCo6jUR+UsAVwBkAPxUVX+6zOM+BuAxANi9e/dGHoJoy0zbwXTawELW+wCYTht4/uw4hkaTeHtJ3XNHR3RxtPO+3haPVrg+hXHMxRusr9Xh92Iksx/JWqf7ReTPAXxHVd9b5nP9qjqyoQcU6QLw9wD+NYAZAD8G8LSqfn+l33P06FE9ffr0Rh6GaFNsRzGdNjCftTzdE6BQ9xwaTeLMMnXPR464B//+HW1Vq0dupMYJsMpZC0TkjKoeXevr1nON4M9W+dyGQiDvkwAuqeo4AIjIPwD4OIAVg4Co0mphTwDDcvDSpUkMjybx0sWp0rpnUxAPHezDQKLPk7rnWjVOACVVzsKQNqoPXlQIrgC4X0Sa4Z4aGgTAl/vkicI8oJm0N3sC2I7itSvTGBpN4mfnJ5AquhgdCQVw//5uDCa24WP7uj1tySytccbCQWQtGz8+M4Zfv2cHYuHGnMrpF1UPAlV9WUSeBvAq3AvPrwF4strrIH/zciCcqmLkemG65+11z4/u6cJgIo4HD/aipUbqntfnMuiIhhEICAIiCIgbVMn5LDpiYa+XR1vkyd+y/OmmFU85EVWK4yjmsibmMlbVA2C1uufdhbrnkT50NddG3bO4yrmvpwXjCzk0BW+d7kkbFmucDaI2Xm4QVZhpO5jNmFjIWlW9BnBjNrt48L84saTu2deCgSNxDPTHsb09WrU1raQp392PLjOO+Q8fPsAaZwNjEFBDy5o2ZjPVvQ9gKnVruuc712uz7lmochbm8K9V5WSNs7ExCKjhqCpSho2ZtFHSvKmkhZyFn513p3u+emWZumcijsFEHInt1at7FguIoClc2uHf6DqOJ+I88DcoBgE1jMIF4LmMWZUNYQzLwUsXJzE0msRLFydh2reO/i1NQRw71IfBRBwf3NVZ1rrnevr8S6uc7PDTahgEVPdUFXMZCzOZyldAbUfx6pVpDK9Q93xgfw8G8pu5V6JOuVKf/0uBQzh+JO6e5w9zHDNtDIOA6la1KqCqineuz2FoJInnz43fVvc8uufWdM/mSGX/SRX6/M2REESAtmgQWdPCT157H791L0ex0OYwCKju2I5iPuveBVzJdwAXxxfyjZ9x3JgrrXv+yp23pnt2VqHuGQ66F3aTC1l05ccyFzRHQhibTld8DdS4GARUN6pRAb0+m1k8+F9aUvc80NeCwfwF00rXPSNFw9mKq5x7ulvcscyRW0HAscy0VQwCqnlZ08ZcxsRChSqgUykDJ86OY3j0Jt65Pl/yuTs6b9U99/ZUpu5ZXOUsbLS+UpWTY5mpEhgEVLNSOQuzGRNZs/ybwSzkLLyQr3u+tqTu2d0SwSNH+jDYH8eRbeWve4rI4gG/cPBf72Owz0+VwCCgmuI4ivlcZSqgOdPGS5emMDSSxMuXSuuerU0hHDvUi4F11j03MpK53OOY2eencmMQUE0wLGfxDuBynv8v1D2HRpL42bsTSBfVPZuK6p73baDuudZI5sUOP6ucVCcYBOSpjOGOgEgb5Tv/76jinffnMDSaxPNnxzGTKa173ru3GwOJOB482LOpuufSkczNkRCyloW/f20Mv/nRnRzHTHWHQUCeyBg2JlO5so2AUFVcLJrueXMuV/L5X7mzHYP92/DwoT50NG9tbPKNuQw68xXOgLjn/MPBMG7MZhkCVJcYBFRVpu1gKmWUbQjc+zOZxYP/5cnSLv3BvlYM9MfxyJE+bNtC3bN4HHM0FMC+3la3whm6dZ6fFU6qZwwCqgrLdjCTMcuyF7Bb90xiaDSJkSV1zzs7YxhI9GEgEceeTdQ9RcQ98IcCiEWCaAoFb7twzAonNRoGAVWUYTmYyRhI5ewtBcBC1sIL58cxNJrEL6/OlNQ9e1rduudAYuN1z42OYwZY4aTGwyCgishZNmbTW7sJLGfaePHiFIZGb+KVS1O31z0P9+KOjhhOXZrCC+cn8O7N1Ko1TqA845gBVjipsTAIqKy22gKybAevXplZ3Mw9Y5bWPT9+wK173ru3G7+8MrNqjRO4fRxzNMwqJ9FSDALasq3eBLZY9xxJ4sS5ccwW1T2DAcHRPV0Y7I/jwQO9iEVuHciX1jhj4SCylo0fnxnDr9+zgx1+onViENCmOY5iNmNiLrvxKaCqiovjKQzlGz/J+aV1zw58sj+OY6vUPa/PZdAedf8KBwKCYP5Cb3I+i47Y1iqiRH7CIKANsx3FXMYdA73Ru4CvFdU931ta94y3YjDh1j3j66h77uiIYTqdQ2tTGIH8ef60YbHGSbRBDAJat8IY6I1WQCcXcjiR38x99EZp3XNnVwwDR9zpnrt71ncAD4igLRrCFx85iD//p3eQNW3WOIm2gEFAa0obFuaz1oZuApvPmovTPZerexYO/oe3ta67tRMMCDpjEbRFQwgEBIN3bUMwIKxxEm0Rg4CWZdkOFnJuAKz3AnDWtN3N3EeSeOVyad2zLRrCsUN9GEj04Z6dG9vMPRQIoKM5jPZo6LbQYI2TaOsYBLTIcRQpw8JCzkLGWN8eAJbt4MyVafzdqTG8Plb6yj8aCuCBAz0Y7HfrnuHgxubwhINuALQ13R4ARFQ+DAJC1rQXT/2s5+Kvo4q3rs1ieHQcJ84mMZctPWVU2HTlS588hIcO9214PU3hIDpjYbQ08a8nUTXwX5pPOY5iwXC7/+uZAKqquDCewtDITTx3dvy2umdTKICOmPvqPRgQZEwbP3nt/Q0FQUtTCB2xMG/6IqoyBoHPGJaD+azb/FnPq/9r00V1z6nSuueheCsG++N4+swYuprDENw6fRMNB3BjLrPm9y80gNpj4Q2fOiKi8mAQ+ETacPf/Xc+5/4mFXH4z9xXqnvnN3Hd3u3XPly5MYTKVW7zDFwCypoPt7bEVHyMUyL+DyDeAiMg7DIIGZtkO5rNu88dyVj/9M581cfLcBIZGk3j96gyK3yv0tkbwyJE4BvvjOBS/ve756L278MTweWRMG9FwAFnTgeUoHr13122PU6iAtsd4AZioVjAIGlDGsDGXNZE2Vh/9nDVtvHhhEkOjSbxyaQpWUeWnPRrCscPuaOd7dnYs3rm7nPv2d+NxHMKPTl3FjbkMti+zmXs4GEB7lO8AiGoRg6BBOI7in9+6jqdeuIRrsxnsWOZgDLjvEk6/N43hUXcz96x5651CNBTAxw/2YjARx9G9XRs6Z3/f/u5lxz9Hw0F0sAFEVNM8+dcpIp0AngJwNwAF8HlVfdGLtdQ7w3IwlzUx9M5N/NXQ8iOZj+7rwpvXZjE8ksTz58ZL6p6hgODo3i58sn8bHjjQU3Kefyui4SC6miMl00KJqDZ59TLtCQD/rKr/SkQiADglbANUdfGu32x+Xv8PXykdyRwNBTCXtfDfnz0HABhfuFX3FAAf3NWBgcQ2HDvUi/YyTuqMRYLojDEAiOpJ1YNARDoAHAPw+wCgqgYAo9rrqEc5y73xa2GZ6mdhJLNhOZjPWZjPmjDs0q85sq0NA/1xHD/ch762prKujfcAENUvL94R7AMwDuBvROSDAM4AeFxVUx6speYVXv3PZS3kzOWrnxMLOQRF8N5k+raDf1MogN++bxcGEvGKjGdujoTQ1RLmBjBEdcyLIAgB+AiAL6rqyyLyBICvAPjT4i8SkccAPAYAu3fvrvoivWbaDuYy7p6/y236MpcxcTI/3XNp3bNwiqgpHMAff/IwPnagp+zrawoH0c1rAEQNwYsgGAMwpqov53/9NNwgKKGqTwJ4EgCOHj26sd1P6lTh1f9KQ98yhbrnSBKnLt9e9+zf3o7JlIH5rIEdHc1rbuS+GZFQAF3NEbaAiBpI1f81q+oNEbkqIkdU9SyAQQDvVHsdtUJVkTZspAwL6Zx927l/03Zw+rJb9/z5uxPIFs0FioYDePBALwb74/jono3VPTeKAUDUuLz6V/1FAD/IN4YuAvicR+uoiBOjSXzr5EVcnU5j1zKbpRiWg6xlI2O4/y09+DuqeHNsFsOjy9c979vXjcFEHPeXse65knAwgK6WCFoZAEQNy5N/3ar6SwBHvXjsSjsxmsTXnnkb4aCgMxZGcj6LP/3Ht/CVXAL37utGxrSXPeevqjifXMDQSBInzo4vU/fsxGAijofKXPdcSSgQQGdLGO1RbgJP1Oj4Mq/M/vr5CwgGgKZQEJajiztx/a8XLqH/jvbbvv7qVHpxuufV6dJpnYW65yNH+tDbWt6650oCIuiIhdHZHOYsICKfYBBskaoiZznIGDaylo3Lkym0RUOw7NJz+cUjmcfnczhxNonh0XGcvVk63XN3dzMGEn0Vq3uuJBgQtEXD6IiFN7SNJBHVPwbBBjmOImvZyJoOsqaNnOWUDHbb3h5bdiRzb0sT/umN9zE0ksQbY7Mldc94WxMeOdKHwf5tONDXUtVX4twOkogYBGuwHUXWtN3/LGfFm7oKikcyR0KC6bSJVM7GtZkM3ro+t/h17dEQHj7Sh8FEHHffufp0z0oIBgSdzZFlN4QnIn9hECxh2Q6ylrN48F/PNo7FPrynE7/avw3PvP4+ZjNmySv/aDiATxzsxUAijqN7uhDyYEeuwjWAjliY46CJCEADB8FaFc4C03YP+hnTRs50YNobO/ADbt3zjXzd8+Qydc+P7evGYH8c9+/v8WwWDwOAiFbSkEGwXIXza8+8ja8DeOBgD7Kme4rH3Ulr4wd+oLTu+dzZJCYWbs3NEwAf2n2r7tnmYQVTigKAF4GJaDkNGQTfOnkR4aCgORKCo+pWOW0TTwydx+6erTVxrhTVPceW1j23t2EwEcfxKtY9VyL5TeE7Y2FPTkERUf1oqCAoVDnfy1c4c5aNwkn6SCiA67OZ1b/BCsbnc3jurHvwP3dzoeRze7qbFzdzv7Nr5c3aq6m1KYSulkhFR04QUeOo6yBwnHyHP39ht1DljLdFl61wbm9f/4F6NmPi5LlxDI0m8eYydc+BRByDiTj2V7nuuRqOhCaizairILAdRS4/oydrOTCWdPgLiiuc0XAgfy1A8ei9u1b9/hnDxs8vuKOdT12eLhkF0REL4+HDbt3zA3e2V73uuZpoOIjulgg3hSGiTamLILAcxdh0et1Vzvv2d+NxHMKPTl3FjbkMtq+wkTvgtoZeuTSF4dEkfnFhErmix4iFg/jEoV4MJPrw0d3e1D1XEwkF0N0SQXOkLv43ElGNqosjiO3ohvv89+3vXnEWv+0o3hibwdBoEi+cn8B8Ud0zHBTct7cbg/3bcP/+7pp8lc2R0ERUTr45kqgqzt6cx/BoEs+NjmMydavuGRDgQ4vTPfvQGq3NPxYGABFVQsMfUd6bTOXrnuO4NlPaGurf0YaBhLuZe4/Hdc/VNIWD6GoO8xQQEVVEQx5ZknNZDJ8dx/BIEu+OL1P37M/XPTtro+65klAggO5WbgpDRJXVMEeY2bSJ58+PY3jUne5ZrFbrnisRce+I5p4ARFQNdR0Ea9U9jx/uw2B/HHfdUVt1z9W0RcPoaubdwERUPXUXBIbl4NTlleueDx1yp3t+ZHdnXR1MmyMhdLfmImxMAAAG9ElEQVREEAnVz5qJqDHURxAo8OqV6fx0zwks5Errnh/b1+NO99zXjaYarHuuhjeDEZHX6iIILk4s4N//+I3FXwcE+PCuTgz0b8NDB3trtu65Gt4MRkS1oi6OQlb+3P9dhbrnkTi6WyIer2pzwsEAOpvDno6mJiIqVhdB0Nsawff/4D7cUeN1z9WEAu7ewNwakohqTV0EQXdLU92GQEAEnc1htEe5MxgR1aa6CIJ6JCJoj4bQ2RzhzmBEVNMYBGUmIu7GMLwXgIjqBIOgjFqb3HcAvBeAiOoJg6AMuDMYEdUzBsEW8GYwImoEDIJN4M1gRNRIeCTbAN4MRkSNiEGwDrwZjIgaGYNgFYWbwTpi3BeAiBoXg2AZIoKOmBsAvBmMiBqdZ0EgIkEApwFcU9VPe7WOYrwZjIj8yMt3BI8DGAHQ7uEaFrU2hdDVEkGYAUBEPuPJUU9EdgL4DQBPefH4xZojIdzZFUO8PcoQICJf8uodwV8B+DKAtpW+QEQeA/AYANyxc1fZF8CbwYiIXFV/CSwinwaQVNUzq32dqj6pqkdV9Wh3T2/ZHj8SCmB7RxR3dMYYAkRE8OYdwYMAPiMi/wJAFEC7iHxfVX+nkg8aDgbQ1RJBaxOLUkRExar+jkBVv6qqO1V1L4BHAQxXMgTCwQD62pqwq7uZIUBEtIyGPTJyHAQR0fp4GgSqegLAiXJ+z0IAtDZxHAQR0Xo0zDuCcNCdB9TGACAi2pC6D4JQIIDOFgYAEdFm1W0QBAOCzuYIJ4ISEW1R3QVBoGggXIAD4YiItqxugkBE0B51N4fnRFAiovKpiyAIBgS7umKcCEpEVAF1cWQNBYQhQERUITy6EhH5HIOAiMjnGARERD7HICAi8jkGARGRzzEIiIh8jkFARORzDAIiIp9jEBAR+ZyoqtdrWJOIjAN4b5O/vRfARBmXUw/4nP3Bb8/Zb88X2Ppz3qOqfWt9UV0EwVaIyGlVPer1OqqJz9kf/Pac/fZ8geo9Z54aIiLyOQYBEZHP+SEInvR6AR7gc/YHvz1nvz1foErPueGvERAR0er88I6AiIhW0fBBICJBEXlNRP7J67VUg4h0isjTIjIqIiMi8oDXa6okEfmSiLwtIm+JyA9FJOr1mipBRL4jIkkReavoY90i8qyInM//2OXlGstphef7F/m/12+IyE9EpNPLNZbbcs+56HN/LCIqIr2VeOyGDwIAjwMY8XoRVfQEgH9W1QSAD6KBn7uI3AngjwAcVdW7AQQBPOrtqirmuwA+teRjXwEwpKqHAAzlf90ovovbn++zAO5W1XsAnAPw1WovqsK+i9ufM0RkF4BfA3ClUg/c0EEgIjsB/AaAp7xeSzWISAeAYwC+DQCqaqjqjLerqrgQgJiIhAA0A3jf4/VUhKqeBDC15MOfBfC9/M+/B+BfVnVRFbTc81XVn6qqlf/lSwB2Vn1hFbTC/2MA+B8AvgygYhd0GzoIAPwV3D9Ax+uFVMk+AOMA/iZ/OuwpEWnxelGVoqrXAPwl3FdK1wHMqupPvV1VVW1T1ev5n98AsM3LxVTZ5wH8X68XUWki8lkA11T19Uo+TsMGgYh8GkBSVc94vZYqCgH4CID/qaofBpBCY50uKJE/J/5ZuAF4B4AWEfkdb1flDXXrf76oAIrInwCwAPzA67VUkog0A/hPAL5W6cdq2CAA8CCAz4jIZQA/AjAgIt/3dkkVNwZgTFVfzv/6abjB0Kg+CeCSqo6rqgngHwB83OM1VdNNEdkBAPkfkx6vp+JE5PcBfBrAv9HG774fgPsi5/X8cWwngFdFZHu5H6hhg0BVv6qqO1V1L9wLiMOq2tCvFlX1BoCrInIk/6FBAO94uKRKuwLgfhFpFhGB+3wb9uL4Mp4B8Hv5n/8egH/0cC0VJyKfgnuq9zOqmvZ6PZWmqm+qalxV9+aPY2MAPpL/d15WDRsEPvZFAD8QkTcAfAjAf/V4PRWTf+fzNIBXAbwJ9+9zQ959KiI/BPAigCMiMiYifwDgGwB+VUTOw3139A0v11hOKzzfbwJoA/CsiPxSRP7a00WW2QrPuTqP3fjvroiIaDV8R0BE5HMMAiIin2MQEBH5HIOAiMjnGARERD7HICAi8jkGARGRzzEIiDZBRO7Nz8WPikhLfk+Eu71eF9Fm8IYyok0Skf8CIAogBnfG03/zeElEm8IgINokEYkAOAUgC+Djqmp7vCSiTeGpIaLN6wHQCnf+TUNukUn+wHcERJskIs/AHXG+D8AOVf13Hi+JaFNCXi+AqB6JyO8CMFX1/4hIEMAvRGRAVYe9XhvRRvEdARGRz/EaARGRzzEIiIh8jkFARORzDAIiIp9jEBAR+RyDgIjI5xgEREQ+xyAgIvK5/w/li5XIRgBZ0AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot Anscombe 3\n", "sns.regplot(x = \"x\", y = \"y\", data = anscombe_3)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# Split the data in half into fold 1 and fold 2\n", "X_fold1, X_fold2, y_fold1, y_fold2 = train_test_split(anscombe_3[[\"x\"]], anscombe_3[\"y\"], test_size=0.5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
3012.0
286.0
259.0
2210.0
2714.0
317.0
\n", "
" ], "text/plain": [ " x\n", "30 12.0\n", "28 6.0\n", "25 9.0\n", "22 10.0\n", "27 14.0\n", "31 7.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_fold2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the fold 1 data to fit the linear model using the sci-kit learn version:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", " normalize=False)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_fold1 = linear_model.LinearRegression()\n", "lm_fold1.fit(X_fold1, y_fold1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use this linear model to make predictions for the fold 2 data:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "fold2_predictions = lm_fold1.predict(X_fold2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compute the mean squared error for the fold 2 predictions:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.756723420796892" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "((y_fold2 - fold2_predictions)**2).mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's do the reverse. Use the fold2 data to create the linear model:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", " normalize=False)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_fold2 = linear_model.LinearRegression()\n", "lm_fold2.fit(X_fold2, y_fold2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use this linear model to make predictions for the fold 1 data:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "fold1_predictions = lm_fold2.predict(X_fold1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compute the mean squared error for the fold 1 predictions:" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.604766161332178" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "((y_fold1 - fold1_predictions)**2).mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How do the two mean squared errors compare? What might be happening here?\n", "\n", "To better understand whaat's happening, let's plot the two training data sets using `regplot()` in Seaborn. \n", "\n", "First plot the fold 1 data, where x is `X_fold1[\"x\"]` and y is `y_fold1`." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.regplot(x = X_fold1[\"x\"], y = y_fold1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next plot the fold 2 data, where x is `X_fold2[\"x\"]` and y is `y_fold2`." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.regplot(x = X_fold2[\"x\"], y = y_fold2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.8" } }, "nbformat": 4, "nbformat_minor": 2 }